import pandas as pd
import numpy as np
#to plot the data
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')
%matplotlib inline
# Input files don't have column names
dependent_var = ['RUL']
index_columns_names = ["UnitNumber","Cycle"]
operational_settings_columns_names = ['mode1', 'mode2', 'mode3']
sensor_measure_columns_names =['sensor{}'.format(i) for i in range(1, 22)]
input_file_column_names = index_columns_names + operational_settings_columns_names + sensor_measure_columns_names
#Importing train dataset and calculating remaining useful life
df_train = pd.read_csv('C:\\Users\\Madhu Shree\\OneDrive\\Desktop\\Generative Model\\data\\CMAPSSData\\train_FD001.txt',delim_whitespace=True,names=input_file_column_names)
rul = pd.DataFrame(df_train.groupby('UnitNumber')['Cycle'].max()).reset_index()
rul.columns = ['UnitNumber', 'max']
df_train = df_train.merge(rul, on=['UnitNumber'], how='left')
df_train['RUL'] = df_train['max'] - df_train['Cycle']
df_train.drop('max', axis=1, inplace=True)
C:\Users\Madhu Shree\AppData\Local\Temp\ipykernel_9432\2559281974.py:5: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\s+'`` instead
df_train = pd.read_csv('C:\\Users\\Madhu Shree\\OneDrive\\Desktop\\Generative Model\\data\\CMAPSSData\\train_FD001.txt',delim_whitespace=True,names=input_file_column_names)
df_train.head()
| UnitNumber | Cycle | mode1 | mode2 | mode3 | sensor1 | sensor2 | sensor3 | sensor4 | sensor5 | ... | sensor13 | sensor14 | sensor15 | sensor16 | sensor17 | sensor18 | sensor19 | sensor20 | sensor21 | RUL | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | -0.0007 | -0.0004 | 100.0 | 518.67 | 641.82 | 1589.70 | 1400.60 | 14.62 | ... | 2388.02 | 8138.62 | 8.4195 | 0.03 | 392 | 2388 | 100.0 | 39.06 | 23.4190 | 191 |
| 1 | 1 | 2 | 0.0019 | -0.0003 | 100.0 | 518.67 | 642.15 | 1591.82 | 1403.14 | 14.62 | ... | 2388.07 | 8131.49 | 8.4318 | 0.03 | 392 | 2388 | 100.0 | 39.00 | 23.4236 | 190 |
| 2 | 1 | 3 | -0.0043 | 0.0003 | 100.0 | 518.67 | 642.35 | 1587.99 | 1404.20 | 14.62 | ... | 2388.03 | 8133.23 | 8.4178 | 0.03 | 390 | 2388 | 100.0 | 38.95 | 23.3442 | 189 |
| 3 | 1 | 4 | 0.0007 | 0.0000 | 100.0 | 518.67 | 642.35 | 1582.79 | 1401.87 | 14.62 | ... | 2388.08 | 8133.83 | 8.3682 | 0.03 | 392 | 2388 | 100.0 | 38.88 | 23.3739 | 188 |
| 4 | 1 | 5 | -0.0019 | -0.0002 | 100.0 | 518.67 | 642.37 | 1582.85 | 1406.22 | 14.62 | ... | 2388.04 | 8133.80 | 8.4294 | 0.03 | 393 | 2388 | 100.0 | 38.90 | 23.4044 | 187 |
5 rows × 27 columns
#Importing Test dataset
df_test = pd.read_csv('C:\\Users\\Madhu Shree\\OneDrive\\Desktop\\Generative Model\\data\\CMAPSSData\\test_FD001.txt', delim_whitespace=True, names=input_file_column_names)
df_test.head()
C:\Users\Madhu Shree\AppData\Local\Temp\ipykernel_9432\1464558967.py:3: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\s+'`` instead
df_test = pd.read_csv('C:\\Users\\Madhu Shree\\OneDrive\\Desktop\\Generative Model\\data\\CMAPSSData\\test_FD001.txt', delim_whitespace=True, names=input_file_column_names)
| UnitNumber | Cycle | mode1 | mode2 | mode3 | sensor1 | sensor2 | sensor3 | sensor4 | sensor5 | ... | sensor12 | sensor13 | sensor14 | sensor15 | sensor16 | sensor17 | sensor18 | sensor19 | sensor20 | sensor21 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 0.0023 | 0.0003 | 100.0 | 518.67 | 643.02 | 1585.29 | 1398.21 | 14.62 | ... | 521.72 | 2388.03 | 8125.55 | 8.4052 | 0.03 | 392 | 2388 | 100.0 | 38.86 | 23.3735 |
| 1 | 1 | 2 | -0.0027 | -0.0003 | 100.0 | 518.67 | 641.71 | 1588.45 | 1395.42 | 14.62 | ... | 522.16 | 2388.06 | 8139.62 | 8.3803 | 0.03 | 393 | 2388 | 100.0 | 39.02 | 23.3916 |
| 2 | 1 | 3 | 0.0003 | 0.0001 | 100.0 | 518.67 | 642.46 | 1586.94 | 1401.34 | 14.62 | ... | 521.97 | 2388.03 | 8130.10 | 8.4441 | 0.03 | 393 | 2388 | 100.0 | 39.08 | 23.4166 |
| 3 | 1 | 4 | 0.0042 | 0.0000 | 100.0 | 518.67 | 642.44 | 1584.12 | 1406.42 | 14.62 | ... | 521.38 | 2388.05 | 8132.90 | 8.3917 | 0.03 | 391 | 2388 | 100.0 | 39.00 | 23.3737 |
| 4 | 1 | 5 | 0.0014 | 0.0000 | 100.0 | 518.67 | 642.51 | 1587.19 | 1401.92 | 14.62 | ... | 522.15 | 2388.03 | 8129.54 | 8.4031 | 0.03 | 390 | 2388 | 100.0 | 38.99 | 23.4130 |
5 rows × 26 columns
#Importing True RUL of engines of Test data
y_true = pd.read_csv('C:\\Users\\Madhu Shree\\OneDrive\\Desktop\\Generative Model\\data\\CMAPSSData\\RUL_FD001.txt',delim_whitespace=True,names=["RUL"])
y_true["UnitNumber"] = y_true.index
y_true.head()
C:\Users\Madhu Shree\AppData\Local\Temp\ipykernel_9432\567814418.py:5: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\s+'`` instead
y_true = pd.read_csv('C:\\Users\\Madhu Shree\\OneDrive\\Desktop\\Generative Model\\data\\CMAPSSData\\RUL_FD001.txt',delim_whitespace=True,names=["RUL"])
| RUL | UnitNumber | |
|---|---|---|
| 0 | 112 | 0 |
| 1 | 98 | 1 |
| 2 | 69 | 2 |
| 3 | 82 | 3 |
| 4 | 91 | 4 |
#shape of the train data
df_train.shape
(20631, 27)
#Finding the missing values
df_train.isnull().sum()
UnitNumber 0 Cycle 0 mode1 0 mode2 0 mode3 0 sensor1 0 sensor2 0 sensor3 0 sensor4 0 sensor5 0 sensor6 0 sensor7 0 sensor8 0 sensor9 0 sensor10 0 sensor11 0 sensor12 0 sensor13 0 sensor14 0 sensor15 0 sensor16 0 sensor17 0 sensor18 0 sensor19 0 sensor20 0 sensor21 0 RUL 0 dtype: int64
#Unit
df_train.UnitNumber.unique()
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78,
79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
92, 93, 94, 95, 96, 97, 98, 99, 100], dtype=int64)
#Engines having maximum life
df_train[["UnitNumber","Cycle"]].groupby("UnitNumber").max().sort_values(by = ["Cycle"], ascending= False).head(5)
| Cycle | |
|---|---|
| UnitNumber | |
| 69 | 362 |
| 92 | 341 |
| 96 | 336 |
| 67 | 313 |
| 83 | 293 |
#Visualizing Mode Setting of units 1 to 15.
fig,ax=plt.subplots(1,3,figsize=(30,8),sharex='all')
for i in range(0,3):
df_u1=df_train.query('UnitNumber==2').reset_index(drop = True)
df_u1['rolling_avg'] = df_u1['mode'+str(i+1)].rolling(window = 5).mean()
df_u1['rolling_avg'].plot(kind = "line", ax = ax[i])
ax[i].set_title('mode'+str(i+1))
ax[i].set_xlabel("Cycle")
#Mode1
fig, axes = plt.subplots(nrows = 5, ncols = 3, figsize = (25,25))
ax = axes.ravel()
for i in range(0,15):
temp = df_train.mode1[df_train.UnitNumber == i+1].reset_index(drop = True).rolling(window = 5).mean()
temp.plot(kind = "line", ax = ax[i])
ax[i].set_title('Unit'+str(i+1))
ax[i].set_xlabel("Cycle")
#Mode2
fig, axes = plt.subplots(nrows = 5, ncols = 3, figsize = (25,25))
ax = axes.ravel()
for i in range(0,15):
temp = df_train.mode2[df_train.UnitNumber == i+1].reset_index(drop = True).rolling(window = 5).mean()
temp.plot(kind = "line", ax = ax[i])
ax[i].set_title('Unit'+str(i+1))
ax[i].set_xlabel("Cycle")
#Mode3
fig, axes = plt.subplots(nrows = 5, ncols = 3, figsize = (25,25))
ax = axes.ravel()
for i in range(0,15):
temp = df_train.mode3[df_train.UnitNumber == i+1].reset_index(drop = True).rolling(window = 5).mean()
temp.plot(kind = "line", ax = ax[i])
ax[i].set_title('Unit'+str(i+1))
ax[i].set_xlabel("Cycle")
fig,ax=plt.subplots(7,3,figsize=(30,20),sharex=True)
df_u1=df_train.query('UnitNumber==5')
c=0
for i in range(0,7):
for j in range(0,3):
ax[i,j].plot(df_u1.Cycle.values, df_u1['sensor'+str(c+1)])
ax[i,j].set_title('sensor'+str(c+1))
ax[i,j].axvline(0,c='r')
c+=1
plt.suptitle('Sensor Traces: Unit 5',fontsize=50)
plt.show()
#Sensor Measure 6
fig, axes = plt.subplots(nrows = 5, ncols = 3, figsize = (25,25))
ax = axes.ravel()
for i in range(0,15):
temp = df_train.sensor6[df_train.UnitNumber == i+1].reset_index(drop = True)
temp.plot(kind = "line", ax = ax[i])
ax[i].set_title('Unit'+str(i+1))
ax[i].set_xlabel("Cycle")
# necessary features for analysis
not_required_feats = ["sensors1", "sensors5", "sensors6", "sensors10",
"sensors16", "sensors18", "sensors19"]
feats = [feat for feat in sensor_measure_columns_names if feat not in not_required_feats]
feats
['sensor1', 'sensor2', 'sensor3', 'sensor4', 'sensor5', 'sensor6', 'sensor7', 'sensor8', 'sensor9', 'sensor10', 'sensor11', 'sensor12', 'sensor13', 'sensor14', 'sensor15', 'sensor16', 'sensor17', 'sensor18', 'sensor19', 'sensor20', 'sensor21']
#Correlation Analysis
corr = df_train[feats + ["RUL"]].corr()
fig = plt.figure(figsize=(12,12))
ax = fig.add_subplot(111)
ax = sns.heatmap(corr, annot=True, cmap = "coolwarm", fmt=".2f")